1   /*
2    * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
3    * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4    *
5    * This code is free software; you can redistribute it and/or modify it
6    * under the terms of the GNU General Public License version 2 only, as
7    * published by the Free Software Foundation.  Oracle designates this
8    * particular file as subject to the "Classpath" exception as provided
9    * by Oracle in the LICENSE file that accompanied this code.
10   *
11   * This code is distributed in the hope that it will be useful, but WITHOUT
12   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14   * version 2 for more details (a copy is included in the LICENSE file that
15   * accompanied this code).
16   *
17   * You should have received a copy of the GNU General Public License version
18   * 2 along with this work; if not, write to the Free Software Foundation,
19   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20   *
21   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22   * or visit www.oracle.com if you need additional information or have any
23   * questions.
24   */
25  
26  package sun.font;
27  
28  import java.nio.ByteBuffer;
29  import java.nio.CharBuffer;
30  import java.nio.IntBuffer;
31  import java.util.Locale;
32  import java.nio.charset.*;
33  
34  /*
35   * A tt font has a CMAP table which is in turn made up of sub-tables which
36   * describe the char to glyph mapping in (possibly) multiple ways.
37   * CMAP subtables are described by 3 values.
38   * 1. Platform ID (eg 3=Microsoft, which is the id we look for in JDK)
39   * 2. Encoding (eg 0=symbol, 1=unicode)
40   * 3. TrueType subtable format (how the char->glyph mapping for the encoding
41   * is stored in the subtable). See the TrueType spec. Format 4 is required
42   * by MS in fonts for windows. Its uses segmented mapping to delta values.
43   * Most typically we see are (3,1,4) :
44   * CMAP Platform ID=3 is what we use.
45   * Encodings that are used in practice by JDK on Solaris are
46   *  symbol (3,0)
47   *  unicode (3,1)
48   *  GBK (3,5) (note that solaris zh fonts report 3,4 but are really 3,5)
49   * The format for almost all subtables is 4. However the solaris (3,5)
50   * encodings are typically in format 2.
51   */
52  abstract class CMap {
53  
54  //     static char WingDings_b2c[] = {
55  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
56  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
57  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
58  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
59  //         0xfffd, 0xfffd, 0x2702, 0x2701, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
60  //         0xfffd, 0x2706, 0x2709, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
61  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
62  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2707, 0x270d,
63  //         0xfffd, 0x270c, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
64  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
65  //         0xfffd, 0x2708, 0xfffd, 0xfffd, 0x2744, 0xfffd, 0x271e, 0xfffd,
66  //         0x2720, 0x2721, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
67  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
68  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
69  //         0xfffd, 0x2751, 0x2752, 0xfffd, 0xfffd, 0x2756, 0xfffd, 0xfffd,
70  //         0xfffd, 0xfffd, 0xfffd, 0x2740, 0x273f, 0x275d, 0x275e, 0xfffd,
71  //         0xfffd, 0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
72  //         0x2787, 0x2788, 0x2789, 0xfffd, 0x278a, 0x278b, 0x278c, 0x278d,
73  //         0x278e, 0x278f, 0x2790, 0x2791, 0x2792, 0x2793, 0xfffd, 0xfffd,
74  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
75  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x274d, 0xfffd,
76  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2736, 0x2734, 0xfffd, 0x2735,
77  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x272a, 0x2730, 0xfffd,
78  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
79  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x27a5, 0xfffd, 0x27a6, 0xfffd,
80  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
81  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
82  //         0x27a2, 0xfffd, 0xfffd, 0xfffd, 0x27b3, 0xfffd, 0xfffd, 0xfffd,
83  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
84  //         0x27a1, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
85  //         0x27a9, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
86  //         0xfffd, 0xfffd, 0xfffd, 0x2717, 0x2713, 0xfffd, 0xfffd, 0xfffd,
87  //    };
88  
89  //     static char Symbols_b2c[] = {
90  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
91  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
92  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
93  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
94  //         0xfffd, 0xfffd, 0x2200, 0xfffd, 0x2203, 0xfffd, 0xfffd, 0x220d,
95  //         0xfffd, 0xfffd, 0x2217, 0xfffd, 0xfffd, 0x2212, 0xfffd, 0xfffd,
96  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
97  //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
98  //         0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
99  //         0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
100 //         0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
101 //         0x039e, 0x03a8, 0x0396, 0xfffd, 0x2234, 0xfffd, 0x22a5, 0xfffd,
102 //         0xfffd, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
103 //         0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
104 //         0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
105 //         0x03be, 0x03c8, 0x03b6, 0xfffd, 0xfffd, 0xfffd, 0x223c, 0xfffd,
106 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
107 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
108 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
109 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
110 //         0xfffd, 0x03d2, 0xfffd, 0x2264, 0x2215, 0x221e, 0xfffd, 0xfffd,
111 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
112 //         0x2218, 0xfffd, 0xfffd, 0x2265, 0xfffd, 0x221d, 0xfffd, 0x2219,
113 //         0xfffd, 0x2260, 0x2261, 0x2248, 0x22ef, 0x2223, 0xfffd, 0xfffd,
114 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2297, 0x2295, 0x2205, 0x2229,
115 //         0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
116 //         0xfffd, 0x2207, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x221a, 0x22c5,
117 //         0xfffd, 0x2227, 0x2228, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
118 //         0x22c4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0x2211, 0xfffd, 0xfffd,
119 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
120 //         0xfffd, 0xfffd, 0x222b, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
121 //         0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
122 //     };
123 
124     static final short ShiftJISEncoding = 2;
125     static final short GBKEncoding      = 3;
126     static final short Big5Encoding     = 4;
127     static final short WansungEncoding  = 5;
128     static final short JohabEncoding    = 6;
129     static final short MSUnicodeSurrogateEncoding = 10;
130 
131     static final char noSuchChar = (char)0xfffd;
132     static final int SHORTMASK = 0x0000ffff;
133     static final int INTMASK   = 0xffffffff;
134 
135     static final char[][] converterMaps = new char[7][];
136 
137     /*
138      * Unicode->other encoding translation array. A pre-computed look up
139      * which can be shared across all fonts using that encoding.
140      * Using this saves running character coverters repeatedly.
141      */
142     char[] xlat;
143 
144     static CMap initialize(TrueTypeFont font) {
145 
146         CMap cmap = null;
147 
148         int offset, platformID, encodingID=-1;
149 
150         int three0=0, three1=0, three2=0, three3=0, three4=0, three5=0,
151             three6=0, three10=0;
152         boolean threeStar = false;
153 
154         ByteBuffer cmapBuffer = font.getTableBuffer(TrueTypeFont.cmapTag);
155         int cmapTableOffset = font.getTableSize(TrueTypeFont.cmapTag);
156         short numberSubTables = cmapBuffer.getShort(2);
157 
158         /* locate the offsets of all 3,*  (ie Microsoft platform) encodings */
159         for (int i=0; i<numberSubTables; i++) {
160             cmapBuffer.position(i * 8 + 4);
161             platformID = cmapBuffer.getShort();
162             if (platformID == 3) {
163                 threeStar = true;
164                 encodingID = cmapBuffer.getShort();
165                 offset     = cmapBuffer.getInt();
166                 switch (encodingID) {
167                 case 0:  three0  = offset; break; // MS Symbol encoding
168                 case 1:  three1  = offset; break; // MS Unicode cmap
169                 case 2:  three2  = offset; break; // ShiftJIS cmap.
170                 case 3:  three3  = offset; break; // GBK cmap
171                 case 4:  three4  = offset; break; // Big 5 cmap
172                 case 5:  three5  = offset; break; // Wansung
173                 case 6:  three6  = offset; break; // Johab
174                 case 10: three10 = offset; break; // MS Unicode surrogates
175                 }
176             }
177         }
178 
179         /* This defines the preference order for cmap subtables */
180         if (threeStar) {
181             if (three10 != 0) {
182                 cmap = createCMap(cmapBuffer, three10, null);
183             }
184             else if  (three0 != 0) {
185                 /* The special case treatment of these fonts leads to
186                  * anomalies where a user can view "wingdings" and "wingdings2"
187                  * and the latter shows all its code points in the unicode
188                  * private use area at 0xF000->0XF0FF and the former shows
189                  * a scattered subset of its glyphs that are known mappings to
190                  * unicode code points.
191                  * The primary purpose of these mappings was to facilitate
192                  * display of symbol chars etc in composite fonts, however
193                  * this is not needed as all these code points are covered
194                  * by Lucida Sans Regular.
195                  * Commenting this out reduces the role of these two files
196                  * (assuming that they continue to be used in font.properties)
197                  * to just one of contributing to the overall composite
198                  * font metrics, and also AWT can still access the fonts.
199                  * Clients which explicitly accessed these fonts as names
200                  * "Symbol" and "Wingdings" (ie as physical fonts) and
201                  * expected to see a scattering of these characters will
202                  * see them now as missing. How much of a problem is this?
203                  * Perhaps we could still support this mapping just for
204                  * "Symbol.ttf" but I suspect some users would prefer it
205                  * to be mapped in to the Latin range as that is how
206                  * the "symbol" font is used in native apps.
207                  */
208 //              String name = font.platName.toLowerCase(Locale.ENGLISH);
209 //              if (name.endsWith("symbol.ttf")) {
210 //                  cmap = createSymbolCMap(cmapBuffer, three0, Symbols_b2c);
211 //              } else if (name.endsWith("wingding.ttf")) {
212 //                  cmap = createSymbolCMap(cmapBuffer, three0, WingDings_b2c);
213 //              } else {
214                     cmap = createCMap(cmapBuffer, three0, null);
215 //              }
216             }
217             else if (three1 != 0) {
218                 cmap = createCMap(cmapBuffer, three1, null);
219             }
220             else if (three2 != 0) {
221                 cmap = createCMap(cmapBuffer, three2,
222                                   getConverterMap(ShiftJISEncoding));
223             }
224             else if (three3 != 0) {
225                 cmap = createCMap(cmapBuffer, three3,
226                                   getConverterMap(GBKEncoding));
227             }
228             else if (three4 != 0) {
229                 /* GB2312 TrueType fonts on Solaris have wrong encoding ID for
230                  * cmap table, these fonts have EncodingID 4 which is Big5
231                  * encoding according the TrueType spec, but actually the
232                  * fonts are using gb2312 encoding, have to use this
233                  * workaround to make Solaris zh_CN locale work.  -sherman
234                  */
235                 if (FontUtilities.isSolaris && font.platName != null &&
236                     (font.platName.startsWith(
237                      "/usr/openwin/lib/locale/zh_CN.EUC/X11/fonts/TrueType") ||
238                      font.platName.startsWith(
239                      "/usr/openwin/lib/locale/zh_CN/X11/fonts/TrueType") ||
240                      font.platName.startsWith(
241                      "/usr/openwin/lib/locale/zh/X11/fonts/TrueType"))) {
242                     cmap = createCMap(cmapBuffer, three4,
243                                        getConverterMap(GBKEncoding));
244                 }
245                 else {
246                     cmap = createCMap(cmapBuffer, three4,
247                                       getConverterMap(Big5Encoding));
248                 }
249             }
250             else if (three5 != 0) {
251                 cmap = createCMap(cmapBuffer, three5,
252                                   getConverterMap(WansungEncoding));
253             }
254             else if (three6 != 0) {
255                 cmap = createCMap(cmapBuffer, three6,
256                                   getConverterMap(JohabEncoding));
257             }
258         } else {
259             /* No 3,* subtable was found. Just use whatever is the first
260              * table listed. Not very useful but maybe better than
261              * rejecting the font entirely?
262              */
263             cmap = createCMap(cmapBuffer, cmapBuffer.getInt(8), null);
264         }
265         return cmap;
266     }
267 
268     /* speed up the converting by setting the range for double
269      * byte characters;
270      */
271     static char[] getConverter(short encodingID) {
272         int dBegin = 0x8000;
273         int dEnd   = 0xffff;
274         String encoding;
275 
276         switch (encodingID) {
277         case ShiftJISEncoding:
278             dBegin = 0x8140;
279             dEnd   = 0xfcfc;
280             encoding = "SJIS";
281             break;
282         case GBKEncoding:
283             dBegin = 0x8140;
284             dEnd   = 0xfea0;
285             encoding = "GBK";
286             break;
287         case Big5Encoding:
288             dBegin = 0xa140;
289             dEnd   = 0xfefe;
290             encoding = "Big5";
291             break;
292         case WansungEncoding:
293             dBegin = 0xa1a1;
294             dEnd   = 0xfede;
295             encoding = "EUC_KR";
296             break;
297         case JohabEncoding:
298             dBegin = 0x8141;
299             dEnd   = 0xfdfe;
300             encoding = "Johab";
301             break;
302         default:
303             return null;
304         }
305 
306         try {
307             char[] convertedChars = new char[65536];
308             for (int i=0; i<65536; i++) {
309                 convertedChars[i] = noSuchChar;
310             }
311 
312             byte[] inputBytes = new byte[(dEnd-dBegin+1)*2];
313             char[] outputChars = new char[(dEnd-dBegin+1)];
314 
315             int j = 0;
316             int firstByte;
317             if (encodingID == ShiftJISEncoding) {
318                 for (int i = dBegin; i <= dEnd; i++) {
319                     firstByte = (i >> 8 & 0xff);
320                     if (firstByte >= 0xa1 && firstByte <= 0xdf) {
321                         //sjis halfwidth katakana
322                         inputBytes[j++] = (byte)0xff;
323                         inputBytes[j++] = (byte)0xff;
324                     } else {
325                         inputBytes[j++] = (byte)firstByte;
326                         inputBytes[j++] = (byte)(i & 0xff);
327                     }
328                 }
329             } else {
330                 for (int i = dBegin; i <= dEnd; i++) {
331                     inputBytes[j++] = (byte)(i>>8 & 0xff);
332                     inputBytes[j++] = (byte)(i & 0xff);
333                 }
334             }
335 
336             Charset.forName(encoding).newDecoder()
337             .onMalformedInput(CodingErrorAction.REPLACE)
338             .onUnmappableCharacter(CodingErrorAction.REPLACE)
339             .replaceWith("\u0000")
340             .decode(ByteBuffer.wrap(inputBytes, 0, inputBytes.length),
341                     CharBuffer.wrap(outputChars, 0, outputChars.length),
342                     true);
343 
344             // ensure single byte ascii
345             for (int i = 0x20; i <= 0x7e; i++) {
346                 convertedChars[i] = (char)i;
347             }
348 
349             //sjis halfwidth katakana
350             if (encodingID == ShiftJISEncoding) {
351                 for (int i = 0xa1; i <= 0xdf; i++) {
352                     convertedChars[i] = (char)(i - 0xa1 + 0xff61);
353                 }
354             }
355 
356             /* It would save heap space (approx 60Kbytes for each of these
357              * converters) if stored only valid ranges (ie returned
358              * outputChars directly. But this is tricky since want to
359              * include the ASCII range too.
360              */
361 //          System.err.println("oc.len="+outputChars.length);
362 //          System.err.println("cc.len="+convertedChars.length);
363 //          System.err.println("dbegin="+dBegin);
364             System.arraycopy(outputChars, 0, convertedChars, dBegin,
365                              outputChars.length);
366 
367             //return convertedChars;
368             /* invert this map as now want it to map from Unicode
369              * to other encoding.
370              */
371             char [] invertedChars = new char[65536];
372             for (int i=0;i<65536;i++) {
373                 if (convertedChars[i] != noSuchChar) {
374                     invertedChars[convertedChars[i]] = (char)i;
375                 }
376             }
377             return invertedChars;
378 
379         } catch (Exception e) {
380             e.printStackTrace();
381         }
382         return null;
383     }
384 
385     /*
386      * The returned array maps to unicode from some other 2 byte encoding
387      * eg for a 2byte index which represents a SJIS char, the indexed
388      * value is the corresponding unicode char.
389      */
390     static char[] getConverterMap(short encodingID) {
391         if (converterMaps[encodingID] == null) {
392            converterMaps[encodingID] = getConverter(encodingID);
393         }
394         return converterMaps[encodingID];
395     }
396 
397 
398     static CMap createCMap(ByteBuffer buffer, int offset, char[] xlat) {
399         /* First do a sanity check that this cmap subtable is contained
400          * within the cmap table.
401          */
402         int subtableFormat = buffer.getChar(offset);
403         long subtableLength;
404         if (subtableFormat < 8) {
405             subtableLength = buffer.getChar(offset+2);
406         } else {
407             subtableLength = buffer.getInt(offset+4) & INTMASK;
408         }
409         if (offset+subtableLength > buffer.capacity()) {
410             if (FontUtilities.isLogging()) {
411                 FontUtilities.getLogger().warning("Cmap subtable overflows buffer.");
412             }
413         }
414         switch (subtableFormat) {
415         case 0:  return new CMapFormat0(buffer, offset);
416         case 2:  return new CMapFormat2(buffer, offset, xlat);
417         case 4:  return new CMapFormat4(buffer, offset, xlat);
418         case 6:  return new CMapFormat6(buffer, offset, xlat);
419         case 8:  return new CMapFormat8(buffer, offset, xlat);
420         case 10: return new CMapFormat10(buffer, offset, xlat);
421         case 12: return new CMapFormat12(buffer, offset, xlat);
422         default: throw new RuntimeException("Cmap format unimplemented: " +
423                                             (int)buffer.getChar(offset));
424         }
425     }
426 
427 /*
428     final char charVal(byte[] cmap, int index) {
429         return (char)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
430     }
431 
432     final short shortVal(byte[] cmap, int index) {
433         return (short)(((0xff & cmap[index]) << 8)+(0xff & cmap[index+1]));
434     }
435 */
436     abstract char getGlyph(int charCode);
437 
438     /* Format 4 Header is
439      * ushort format (off=0)
440      * ushort length (off=2)
441      * ushort language (off=4)
442      * ushort segCountX2 (off=6)
443      * ushort searchRange (off=8)
444      * ushort entrySelector (off=10)
445      * ushort rangeShift (off=12)
446      * ushort endCount[segCount] (off=14)
447      * ushort reservedPad
448      * ushort startCount[segCount]
449      * short idDelta[segCount]
450      * idRangeOFfset[segCount]
451      * ushort glyphIdArray[]
452      */
453     static class CMapFormat4 extends CMap {
454         int segCount;
455         int entrySelector;
456         int rangeShift;
457         char[] endCount;
458         char[] startCount;
459         short[] idDelta;
460         char[] idRangeOffset;
461         char[] glyphIds;
462 
463         CMapFormat4(ByteBuffer bbuffer, int offset, char[] xlat) {
464 
465             this.xlat = xlat;
466 
467             bbuffer.position(offset);
468             CharBuffer buffer = bbuffer.asCharBuffer();
469             buffer.get(); // skip, we already know format=4
470             int subtableLength = buffer.get();
471             /* Try to recover from some bad fonts which specify a subtable
472              * length that would overflow the byte buffer holding the whole
473              * cmap table. If this isn't a recoverable situation an exception
474              * may be thrown which is caught higher up the call stack.
475              * Whilst this may seem lenient, in practice, unless the "bad"
476              * subtable we are using is the last one in the cmap table we
477              * would have no way of knowing about this problem anyway.
478              */
479             if (offset+subtableLength > bbuffer.capacity()) {
480                 subtableLength = bbuffer.capacity() - offset;
481             }
482             buffer.get(); // skip language
483             segCount = buffer.get()/2;
484             int searchRange = buffer.get();
485             entrySelector = buffer.get();
486             rangeShift    = buffer.get()/2;
487             startCount = new char[segCount];
488             endCount = new char[segCount];
489             idDelta = new short[segCount];
490             idRangeOffset = new char[segCount];
491 
492             for (int i=0; i<segCount; i++) {
493                 endCount[i] = buffer.get();
494             }
495             buffer.get(); // 2 bytes for reserved pad
496             for (int i=0; i<segCount; i++) {
497                 startCount[i] = buffer.get();
498             }
499 
500             for (int i=0; i<segCount; i++) {
501                 idDelta[i] = (short)buffer.get();
502             }
503 
504             for (int i=0; i<segCount; i++) {
505                 char ctmp = buffer.get();
506                 idRangeOffset[i] = (char)((ctmp>>1)&0xffff);
507             }
508             /* Can calculate the number of glyph IDs by subtracting
509              * "pos" from the length of the cmap
510              */
511             int pos = (segCount*8+16)/2;
512             buffer.position(pos);
513             int numGlyphIds = (subtableLength/2 - pos);
514             glyphIds = new char[numGlyphIds];
515             for (int i=0;i<numGlyphIds;i++) {
516                 glyphIds[i] = buffer.get();
517             }
518 /*
519             System.err.println("segcount="+segCount);
520             System.err.println("entrySelector="+entrySelector);
521             System.err.println("rangeShift="+rangeShift);
522             for (int j=0;j<segCount;j++) {
523               System.err.println("j="+j+ " sc="+(int)(startCount[j]&0xffff)+
524                                  " ec="+(int)(endCount[j]&0xffff)+
525                                  " delta="+idDelta[j] +
526                                  " ro="+(int)idRangeOffset[j]);
527             }
528 
529             //System.err.println("numglyphs="+glyphIds.length);
530             for (int i=0;i<numGlyphIds;i++) {
531                   System.err.println("gid["+i+"]="+(int)glyphIds[i]);
532             }
533 */
534         }
535 
536         char getGlyph(int charCode) {
537 
538             int index = 0;
539             char glyphCode = 0;
540 
541             int controlGlyph = getControlCodeGlyph(charCode, true);
542             if (controlGlyph >= 0) {
543                 return (char)controlGlyph;
544             }
545 
546             /* presence of translation array indicates that this
547              * cmap is in some other (non-unicode encoding).
548              * In order to look-up a char->glyph mapping we need to
549              * translate the unicode code point to the encoding of
550              * the cmap.
551              * REMIND: VALID CHARCODES??
552              */
553             if (xlat != null) {
554                 charCode = xlat[charCode];
555             }
556 
557             /*
558              * Citation from the TrueType (and OpenType) spec:
559              *   The segments are sorted in order of increasing endCode
560              *   values, and the segment values are specified in four parallel
561              *   arrays. You search for the first endCode that is greater than
562              *   or equal to the character code you want to map. If the
563              *   corresponding startCode is less than or equal to the
564              *   character code, then you use the corresponding idDelta and
565              *   idRangeOffset to map the character code to a glyph index
566              *   (otherwise, the missingGlyph is returned).
567              */
568 
569             /*
570              * CMAP format4 defines several fields for optimized search of
571              * the segment list (entrySelector, searchRange, rangeShift).
572              * However, benefits are neglible and some fonts have incorrect
573              * data - so we use straightforward binary search (see bug 6247425)
574              */
575             int left = 0, right = startCount.length;
576             index = startCount.length >> 1;
577             while (left < right) {
578                 if (endCount[index] < charCode) {
579                     left = index + 1;
580                 } else {
581                     right = index;
582                 }
583                 index = (left + right) >> 1;
584             }
585 
586             if (charCode >= startCount[index] && charCode <= endCount[index]) {
587                 int rangeOffset = idRangeOffset[index];
588 
589                 if (rangeOffset == 0) {
590                     glyphCode = (char)(charCode + idDelta[index]);
591                 } else {
592                     /* Calculate an index into the glyphIds array */
593 
594 /*
595                     System.err.println("rangeoffset="+rangeOffset+
596                                        " charCode=" + charCode +
597                                        " scnt["+index+"]="+(int)startCount[index] +
598                                        " segCnt="+segCount);
599 */
600 
601                     int glyphIDIndex = rangeOffset - segCount + index
602                                          + (charCode - startCount[index]);
603                     glyphCode = glyphIds[glyphIDIndex];
604                     if (glyphCode != 0) {
605                         glyphCode = (char)(glyphCode + idDelta[index]);
606                     }
607                 }
608             }
609             if (glyphCode != 0) {
610             //System.err.println("cc="+Integer.toHexString((int)charCode) + " gc="+(int)glyphCode);
611             }
612             return glyphCode;
613         }
614     }
615 
616     // Format 0: Byte Encoding table
617     static class CMapFormat0 extends CMap {
618         byte [] cmap;
619 
620         CMapFormat0(ByteBuffer buffer, int offset) {
621 
622             /* skip 6 bytes of format, length, and version */
623             int len = buffer.getChar(offset+2);
624             cmap = new byte[len-6];
625             buffer.position(offset+6);
626             buffer.get(cmap);
627         }
628 
629         char getGlyph(int charCode) {
630             if (charCode < 256) {
631                 if (charCode < 0x0010) {
632                     switch (charCode) {
633                     case 0x0009:
634                     case 0x000a:
635                     case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
636                     }
637                 }
638                 return (char)(0xff & cmap[charCode]);
639             } else {
640                 return 0;
641             }
642         }
643     }
644 
645 //     static CMap createSymbolCMap(ByteBuffer buffer, int offset, char[] syms) {
646 
647 //      CMap cmap = createCMap(buffer, offset, null);
648 //      if (cmap == null) {
649 //          return null;
650 //      } else {
651 //          return new CMapFormatSymbol(cmap, syms);
652 //      }
653 //     }
654 
655 //     static class CMapFormatSymbol extends CMap {
656 
657 //      CMap cmap;
658 //      static final int NUM_BUCKETS = 128;
659 //      Bucket[] buckets = new Bucket[NUM_BUCKETS];
660 
661 //      class Bucket {
662 //          char unicode;
663 //          char glyph;
664 //          Bucket next;
665 
666 //          Bucket(char u, char g) {
667 //              unicode = u;
668 //              glyph = g;
669 //          }
670 //      }
671 
672 //      CMapFormatSymbol(CMap cmap, char[] syms) {
673 
674 //          this.cmap = cmap;
675 
676 //          for (int i=0;i<syms.length;i++) {
677 //              char unicode = syms[i];
678 //              if (unicode != noSuchChar) {
679 //                  char glyph = cmap.getGlyph(i + 0xf000);
680 //                  int hash = unicode % NUM_BUCKETS;
681 //                  Bucket bucket = new Bucket(unicode, glyph);
682 //                  if (buckets[hash] == null) {
683 //                      buckets[hash] = bucket;
684 //                  } else {
685 //                      Bucket b = buckets[hash];
686 //                      while (b.next != null) {
687 //                          b = b.next;
688 //                      }
689 //                      b.next = bucket;
690 //                  }
691 //              }
692 //          }
693 //      }
694 
695 //      char getGlyph(int unicode) {
696 //          if (unicode >= 0x1000) {
697 //              return 0;
698 //          }
699 //          else if (unicode >=0xf000 && unicode < 0xf100) {
700 //              return cmap.getGlyph(unicode);
701 //          } else {
702 //              Bucket b = buckets[unicode % NUM_BUCKETS];
703 //              while (b != null) {
704 //                  if (b.unicode == unicode) {
705 //                      return b.glyph;
706 //                  } else {
707 //                      b = b.next;
708 //                  }
709 //              }
710 //              return 0;
711 //          }
712 //      }
713 //     }
714 
715     // Format 2: High-byte mapping through table
716     static class CMapFormat2 extends CMap {
717 
718         char[] subHeaderKey = new char[256];
719          /* Store subheaders in individual arrays
720           * A SubHeader entry theortically looks like {
721           *   char firstCode;
722           *   char entryCount;
723           *   short idDelta;
724           *   char idRangeOffset;
725           * }
726           */
727         char[] firstCodeArray;
728         char[] entryCountArray;
729         short[] idDeltaArray;
730         char[] idRangeOffSetArray;
731 
732         char[] glyphIndexArray;
733 
734         CMapFormat2(ByteBuffer buffer, int offset, char[] xlat) {
735 
736             this.xlat = xlat;
737 
738             int tableLen = buffer.getChar(offset+2);
739             buffer.position(offset+6);
740             CharBuffer cBuffer = buffer.asCharBuffer();
741             char maxSubHeader = 0;
742             for (int i=0;i<256;i++) {
743                 subHeaderKey[i] = cBuffer.get();
744                 if (subHeaderKey[i] > maxSubHeader) {
745                     maxSubHeader = subHeaderKey[i];
746                 }
747             }
748             /* The value of the subHeaderKey is 8 * the subHeader index,
749              * so the number of subHeaders can be obtained by dividing
750              * this value bv 8 and adding 1.
751              */
752             int numSubHeaders = (maxSubHeader >> 3) +1;
753             firstCodeArray = new char[numSubHeaders];
754             entryCountArray = new char[numSubHeaders];
755             idDeltaArray  = new short[numSubHeaders];
756             idRangeOffSetArray  = new char[numSubHeaders];
757             for (int i=0; i<numSubHeaders; i++) {
758                 firstCodeArray[i] = cBuffer.get();
759                 entryCountArray[i] = cBuffer.get();
760                 idDeltaArray[i] = (short)cBuffer.get();
761                 idRangeOffSetArray[i] = cBuffer.get();
762 //              System.out.println("sh["+i+"]:fc="+(int)firstCodeArray[i]+
763 //                                 " ec="+(int)entryCountArray[i]+
764 //                                 " delta="+(int)idDeltaArray[i]+
765 //                                 " offset="+(int)idRangeOffSetArray[i]);
766             }
767 
768             int glyphIndexArrSize = (tableLen-518-numSubHeaders*8)/2;
769             glyphIndexArray = new char[glyphIndexArrSize];
770             for (int i=0; i<glyphIndexArrSize;i++) {
771                 glyphIndexArray[i] = cBuffer.get();
772             }
773         }
774 
775         char getGlyph(int charCode) {
776             int controlGlyph = getControlCodeGlyph(charCode, true);
777             if (controlGlyph >= 0) {
778                 return (char)controlGlyph;
779             }
780 
781             if (xlat != null) {
782                 charCode = xlat[charCode];
783             }
784 
785             char highByte = (char)(charCode >> 8);
786             char lowByte = (char)(charCode & 0xff);
787             int key = subHeaderKey[highByte]>>3; // index into subHeaders
788             char mapMe;
789 
790             if (key != 0) {
791                 mapMe = lowByte;
792             } else {
793                 mapMe = highByte;
794                 if (mapMe == 0) {
795                     mapMe = lowByte;
796                 }
797             }
798 
799 //          System.err.println("charCode="+Integer.toHexString(charCode)+
800 //                             " key="+key+ " mapMe="+Integer.toHexString(mapMe));
801             char firstCode = firstCodeArray[key];
802             if (mapMe < firstCode) {
803                 return 0;
804             } else {
805                 mapMe -= firstCode;
806             }
807 
808             if (mapMe < entryCountArray[key]) {
809                 /* "address" arithmetic is needed to calculate the offset
810                  * into glyphIndexArray. "idRangeOffSetArray[key]" specifies
811                  * the number of bytes from that location in the table where
812                  * the subarray of glyphIndexes starting at "firstCode" begins.
813                  * Each entry in the subHeader table is 8 bytes, and the
814                  * idRangeOffSetArray field is at offset 6 in the entry.
815                  * The glyphIndexArray immediately follows the subHeaders.
816                  * So if there are "N" entries then the number of bytes to the
817                  * start of glyphIndexArray is (N-key)*8-6.
818                  * Subtract this from the idRangeOffSetArray value to get
819                  * the number of bytes into glyphIndexArray and divide by 2 to
820                  * get the (char) array index.
821                  */
822                 int glyphArrayOffset = ((idRangeOffSetArray.length-key)*8)-6;
823                 int glyphSubArrayStart =
824                         (idRangeOffSetArray[key] - glyphArrayOffset)/2;
825                 char glyphCode = glyphIndexArray[glyphSubArrayStart+mapMe];
826                 if (glyphCode != 0) {
827                     glyphCode += idDeltaArray[key]; //idDelta
828                     return glyphCode;
829                 }
830             }
831             return 0;
832         }
833     }
834 
835     // Format 6: Trimmed table mapping
836     static class CMapFormat6 extends CMap {
837 
838         char firstCode;
839         char entryCount;
840         char[] glyphIdArray;
841 
842         CMapFormat6(ByteBuffer bbuffer, int offset, char[] xlat) {
843 
844              System.err.println("WARNING: CMapFormat8 is untested.");
845              bbuffer.position(offset+6);
846              CharBuffer buffer = bbuffer.asCharBuffer();
847              firstCode = buffer.get();
848              entryCount = buffer.get();
849              glyphIdArray = new char[entryCount];
850              for (int i=0; i< entryCount; i++) {
851                  glyphIdArray[i] = buffer.get();
852              }
853          }
854 
855          char getGlyph(int charCode) {
856             int controlGlyph = getControlCodeGlyph(charCode, true);
857             if (controlGlyph >= 0) {
858                 return (char)controlGlyph;
859             }
860 
861              if (xlat != null) {
862                  charCode = xlat[charCode];
863              }
864 
865              charCode -= firstCode;
866              if (charCode < 0 || charCode >= entryCount) {
867                   return 0;
868              } else {
869                   return glyphIdArray[charCode];
870              }
871          }
872     }
873 
874     // Format 8: mixed 16-bit and 32-bit coverage
875     // Seems unlikely this code will ever get tested as we look for
876     // MS platform Cmaps and MS states (in the Opentype spec on their website)
877     // that MS doesn't support this format
878     static class CMapFormat8 extends CMap {
879          byte[] is32 = new byte[8192];
880          int nGroups;
881          int[] startCharCode;
882          int[] endCharCode;
883          int[] startGlyphID;
884 
885          CMapFormat8(ByteBuffer bbuffer, int offset, char[] xlat) {
886 
887              System.err.println("WARNING: CMapFormat8 is untested.");
888              bbuffer.position(12);
889              bbuffer.get(is32);
890              nGroups = bbuffer.getInt();
891              startCharCode = new int[nGroups];
892              endCharCode   = new int[nGroups];
893              startGlyphID  = new int[nGroups];
894          }
895 
896         char getGlyph(int charCode) {
897             if (xlat != null) {
898                 throw new RuntimeException("xlat array for cmap fmt=8");
899             }
900             return 0;
901         }
902 
903     }
904 
905 
906     // Format 4-byte 10: Trimmed table mapping
907     // Seems unlikely this code will ever get tested as we look for
908     // MS platform Cmaps and MS states (in the Opentype spec on their website)
909     // that MS doesn't support this format
910     static class CMapFormat10 extends CMap {
911 
912          long firstCode;
913          int entryCount;
914          char[] glyphIdArray;
915 
916          CMapFormat10(ByteBuffer bbuffer, int offset, char[] xlat) {
917 
918              System.err.println("WARNING: CMapFormat10 is untested.");
919              firstCode = bbuffer.getInt() & INTMASK;
920              entryCount = bbuffer.getInt() & INTMASK;
921              bbuffer.position(offset+20);
922              CharBuffer buffer = bbuffer.asCharBuffer();
923              glyphIdArray = new char[entryCount];
924              for (int i=0; i< entryCount; i++) {
925                  glyphIdArray[i] = buffer.get();
926              }
927          }
928 
929          char getGlyph(int charCode) {
930 
931              if (xlat != null) {
932                  throw new RuntimeException("xlat array for cmap fmt=10");
933              }
934 
935              int code = (int)(charCode - firstCode);
936              if (code < 0 || code >= entryCount) {
937                  return 0;
938              } else {
939                  return glyphIdArray[code];
940              }
941          }
942     }
943 
944     // Format 12: Segmented coverage for UCS-4 (fonts supporting
945     // surrogate pairs)
946     static class CMapFormat12 extends CMap {
947 
948         int numGroups;
949         int highBit =0;
950         int power;
951         int extra;
952         long[] startCharCode;
953         long[] endCharCode;
954         int[] startGlyphID;
955 
956         CMapFormat12(ByteBuffer buffer, int offset, char[] xlat) {
957             if (xlat != null) {
958                 throw new RuntimeException("xlat array for cmap fmt=12");
959             }
960 
961             numGroups = buffer.getInt(offset+12);
962             startCharCode = new long[numGroups];
963             endCharCode = new long[numGroups];
964             startGlyphID = new int[numGroups];
965             buffer.position(offset+16);
966             buffer = buffer.slice();
967             IntBuffer ibuffer = buffer.asIntBuffer();
968             for (int i=0; i<numGroups; i++) {
969                 startCharCode[i] = ibuffer.get() & INTMASK;
970                 endCharCode[i] = ibuffer.get() & INTMASK;
971                 startGlyphID[i] = ibuffer.get() & INTMASK;
972             }
973 
974             /* Finds the high bit by binary searching through the bits */
975             int value = numGroups;
976 
977             if (value >= 1 << 16) {
978                 value >>= 16;
979                 highBit += 16;
980             }
981 
982             if (value >= 1 << 8) {
983                 value >>= 8;
984                 highBit += 8;
985             }
986 
987             if (value >= 1 << 4) {
988                 value >>= 4;
989                 highBit += 4;
990             }
991 
992             if (value >= 1 << 2) {
993                 value >>= 2;
994                 highBit += 2;
995             }
996 
997             if (value >= 1 << 1) {
998                 value >>= 1;
999                 highBit += 1;
1000             }
1001 
1002             power = 1 << highBit;
1003             extra = numGroups - power;
1004         }
1005 
1006         char getGlyph(int charCode) {
1007             int controlGlyph = getControlCodeGlyph(charCode, false);
1008             if (controlGlyph >= 0) {
1009                 return (char)controlGlyph;
1010             }
1011             int probe = power;
1012             int range = 0;
1013 
1014             if (startCharCode[extra] <= charCode) {
1015                 range = extra;
1016             }
1017 
1018             while (probe > 1) {
1019                 probe >>= 1;
1020 
1021                 if (startCharCode[range+probe] <= charCode) {
1022                     range += probe;
1023                 }
1024             }
1025 
1026             if (startCharCode[range] <= charCode &&
1027                   endCharCode[range] >= charCode) {
1028                 return (char)
1029                     (startGlyphID[range] + (charCode - startCharCode[range]));
1030             }
1031 
1032             return 0;
1033         }
1034 
1035     }
1036 
1037     /* Used to substitute for bad Cmaps. */
1038     static class NullCMapClass extends CMap {
1039 
1040         char getGlyph(int charCode) {
1041             return 0;
1042         }
1043     }
1044 
1045     public static final NullCMapClass theNullCmap = new NullCMapClass();
1046 
1047     final int getControlCodeGlyph(int charCode, boolean noSurrogates) {
1048         if (charCode < 0x0010) {
1049             switch (charCode) {
1050             case 0x0009:
1051             case 0x000a:
1052             case 0x000d: return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1053             }
1054         } else if (charCode >= 0x200c) {
1055             if ((charCode <= 0x200f) ||
1056                 (charCode >= 0x2028 && charCode <= 0x202e) ||
1057                 (charCode >= 0x206a && charCode <= 0x206f)) {
1058                 return CharToGlyphMapper.INVISIBLE_GLYPH_ID;
1059             } else if (noSurrogates && charCode >= 0xFFFF) {
1060                 return 0;
1061             }
1062         }
1063         return -1;
1064     }
1065 }